# Wild bootstrap for Meng, Qian and Yared (2015)

# Version 2: added wildboot_joint for testing joint equality to 0


# setwd("D:/Google Drive/2018-19/Canay/Wild Bootstrap/Meng-Qian-Yared/Wildboot")


rm(list=ls())
library("foreign")
library("xtable")
source("Functions.Wildboot.R")


outcome <- "ldeath_b"
testvar <- "lgrain_pred_famdum5860"
testvar_2 <- "lgrain_famdum5860"

control_1 <- c("lgrain_pred", "lurbpop", "ltotpop")
control_2 <- c("lgrain", "lurbpop", "ltotpop")
clustvar <- "prov_clust"
year_fe <- paste0("ydum", 6:34) # omit ydum5 (1953) to prevent collinearity
year_fe_short <- paste0("ydum", 6:17) # omit ydum5 (1953) to prevent collinearity

# Tolerance for Numerical Error in Wild Bootstrap
numerrtol <- 0

# Variable definitions for testing A2.2(iii)
Z <- c("lgrain_pred_famdum5860", "lgrain_pred")
Z_noncons <- c("lgrain_famdum5860", "lgrain")
W <- c("lurbpop", "ltotpop", year_fe)
W_short <- c("lurbpop", "ltotpop", year_fe_short)


#-------------------------------------------------------------------
# Data for Column 1
#-------------------------------------------------------------------

data <- read.dta("temp_prov_processed.dta")

# Generate Cluster variable
provincecodes <- as.numeric(row.names(table(data$prov)))
prov_clust <- (1:length(data[[outcome]]))*0
for (i in 1:length(provincecodes)){
  prov_clust[data$prov == provincecodes[i]] <- i
}
data <- cbind(prov_clust, data)
table(data$prov_clust, data$prov)


#-------------------------------------------------------------------
# Data for Column 2: Column 1, 1953 - 1965
#-------------------------------------------------------------------
data_short <- data[data$year < 1966,]


#-------------------------------------------------------------------
# Data for Column 3: with autonomous regions
#-------------------------------------------------------------------

data_auto <- read.dta("temp_prov_include_auto_processed.dta")

# Generate Cluster variable
provincecodes <- as.numeric(row.names(table(data_auto$prov)))
prov_clust <- (1:length(data_auto[[outcome]]))*0
for (i in 1:length(provincecodes)){
  prov_clust[data_auto$prov == provincecodes[i]] <- i
}
data_auto <- cbind(prov_clust, data_auto)
table(data_auto$prov_clust, data_auto$prov)


#-------------------------------------------------------------------
# Data for Column 4: Column 3, 1953 - 1965
#-------------------------------------------------------------------
data_auto_short <- data_auto[data_auto$year < 1966,]


#-------------------------------------------------------------------
# Perform Wild Bootstrap
#-------------------------------------------------------------------

testvar_joint <- c("lgrain_pred_famdum5860", "lgrain_pred")
testvar_2_joint <- c("lgrain_famdum5860", "lgrain") # variables to use for columns 5 and 6
control_joint <- c("lurbpop", "ltotpop")

col_1_ols_nofe <- wildboot(data, outcome, testvar, c(control_1, year_fe), clustvar, 0, numerrtol)
col_1_ols_fe <- wildboot(data, outcome, testvar, c(control_1, year_fe), clustvar, 1, numerrtol)
col_1_ols_nofe_joint <- wildboot_joint(data, outcome, testvar_joint, c(control_joint, year_fe), clustvar, 0, numerrtol)
col_1_ols_fe_joint <- wildboot_joint(data, outcome, testvar_joint, c(control_joint, year_fe), clustvar, 1, numerrtol)

col_2_ols_nofe <- wildboot(data_short, outcome, testvar, c(control_1, year_fe_short), clustvar, 0, numerrtol)
col_2_ols_fe <- wildboot(data_short, outcome, testvar, c(control_1, year_fe_short), clustvar, 1, numerrtol)
col_2_ols_nofe_joint <- wildboot_joint(data_short, outcome, testvar_joint, c(control_joint, year_fe_short), clustvar, 0, numerrtol)
col_2_ols_fe_joint <- wildboot_joint(data_short, outcome, testvar_joint, c(control_joint, year_fe_short), clustvar, 1, numerrtol)

col_3_ols_nofe <- wildboot(data_auto, outcome, testvar, c(control_1, year_fe), clustvar, 0, numerrtol)
col_3_ols_fe <- wildboot(data_auto, outcome, testvar, c(control_1, year_fe), clustvar, 1, numerrtol)
col_3_ols_nofe_joint <- wildboot_joint(data_auto, outcome, testvar_joint, c(control_joint, year_fe), clustvar, 0, numerrtol)
col_3_ols_fe_joint <- wildboot_joint(data_auto, outcome, testvar_joint, c(control_joint, year_fe), clustvar, 1, numerrtol)

col_4_ols_nofe <- wildboot(data_auto_short, outcome, testvar, c(control_1, year_fe_short), clustvar, 0, numerrtol)
col_4_ols_fe <- wildboot(data_auto_short, outcome, testvar, c(control_1, year_fe_short), clustvar, 1, numerrtol)
col_4_ols_nofe_joint <- wildboot_joint(data_auto_short, outcome, testvar_joint, c(control_joint, year_fe_short), clustvar, 0, numerrtol)
col_4_ols_fe_joint <- wildboot_joint(data_auto_short, outcome, testvar_joint, c(control_joint, year_fe_short), clustvar, 1, numerrtol)

col_5_ols_nofe <- wildboot(data, outcome, testvar_2, c(control_2, year_fe), clustvar, 0, numerrtol)
col_5_ols_fe <- wildboot(data, outcome, testvar_2, c(control_2, year_fe), clustvar, 1, numerrtol)
col_5_ols_nofe_joint <- wildboot_joint(data, outcome, testvar_2_joint, c(control_joint, year_fe), clustvar, 0, numerrtol)
col_5_ols_fe_joint <- wildboot_joint(data, outcome, testvar_2_joint, c(control_joint, year_fe), clustvar, 1, numerrtol)

col_6_ols_nofe <- wildboot(data_short, outcome, testvar_2, c(control_2, year_fe_short), clustvar, 0, numerrtol)
col_6_ols_fe <- wildboot(data_short, outcome, testvar_2, c(control_2, year_fe_short), clustvar, 1, numerrtol)
col_6_ols_nofe_joint <- wildboot_joint(data_short, outcome, testvar_2_joint, c(control_joint, year_fe_short), clustvar, 0, numerrtol)
col_6_ols_fe_joint <- wildboot_joint(data_short, outcome, testvar_2_joint, c(control_joint, year_fe_short), clustvar, 1, numerrtol)

# Assumption 2.2(iii)

col_1_ols_nofe_mat <- wildboot_clustcov(data, Z, W, clustvar, 0)
col_1_ols_fe_mat <- wildboot_clustcov(data, Z, W, clustvar, 1)

col_2_ols_nofe_mat <- wildboot_clustcov(data_short, Z, W_short, clustvar, 0)
col_2_ols_fe_mat <- wildboot_clustcov(data_short, Z, W_short, clustvar, 1)

col_3_ols_nofe_mat <- wildboot_clustcov(data_auto, Z, W, clustvar, 0)
col_3_ols_fe_mat <- wildboot_clustcov(data_auto, Z, W, clustvar, 1)

col_4_ols_nofe_mat <- wildboot_clustcov(data_auto_short, Z, W_short, clustvar, 0)
col_4_ols_fe_mat <- wildboot_clustcov(data_auto_short, Z, W_short, clustvar, 1)

col_5_ols_nofe_mat <- wildboot_clustcov(data, Z_noncons, W, clustvar, 0)
col_5_ols_fe_mat <- wildboot_clustcov(data, Z_noncons, W, clustvar, 1)

col_6_ols_nofe_mat <- wildboot_clustcov(data_short, Z_noncons, W_short, clustvar, 0)
col_6_ols_fe_mat <- wildboot_clustcov(data_short, Z_noncons, W_short, clustvar, 1)


#-------------------------------------------------------------------
# Output
#-------------------------------------------------------------------

digits <- 3

print2Excel(col_1_ols_nofe_mat, "col_1_ols_nofe_mat.xlsx", digits)
print2Excel(col_1_ols_fe_mat, "col_1_ols_fe_mat.xlsx", digits)
print2Excel(col_2_ols_nofe_mat, "col_2_ols_nofe_mat.xlsx", digits)
print2Excel(col_2_ols_fe_mat, "col_2_ols_fe_mat.xlsx", digits)
print2Excel(col_3_ols_nofe_mat, "col_3_ols_nofe_mat.xlsx", digits)
print2Excel(col_3_ols_fe_mat, "col_3_ols_fe_mat.xlsx", digits)
print2Excel(col_4_ols_nofe_mat, "col_4_ols_nofe_mat.xlsx", digits)
print2Excel(col_4_ols_fe_mat, "col_4_ols_fe_mat.xlsx", digits)
print2Excel(col_5_ols_nofe_mat, "col_5_ols_nofe_mat.xlsx", digits)
print2Excel(col_5_ols_fe_mat, "col_5_ols_fe_mat.xlsx", digits)
print2Excel(col_6_ols_nofe_mat, "col_6_ols_nofe_mat.xlsx", digits)
print2Excel(col_6_ols_fe_mat, "col_6_ols_fe_mat.xlsx", digits)


output <- rbind(col_1_ols_nofe$estimates, col_1_ols_fe$estimates, col_1_ols_nofe_joint$estimates, col_1_ols_fe_joint$estimates, 
                col_2_ols_nofe$estimates, col_2_ols_fe$estimates, col_2_ols_nofe_joint$estimates, col_2_ols_fe_joint$estimates,
                col_3_ols_nofe$estimates, col_3_ols_fe$estimates, col_3_ols_nofe_joint$estimates, col_3_ols_fe_joint$estimates,
                col_4_ols_nofe$estimates, col_4_ols_fe$estimates, col_4_ols_nofe_joint$estimates, col_4_ols_fe_joint$estimates,
                col_5_ols_nofe$estimates, col_5_ols_fe$estimates, col_5_ols_nofe_joint$estimates, col_5_ols_fe_joint$estimates,
                col_6_ols_nofe$estimates, col_6_ols_fe$estimates, col_6_ols_nofe_joint$estimates, col_6_ols_fe_joint$estimates)
output
row.names(output) <- c("col_1_ols_nofe", "col_1_ols_fe", "col_1_ols_nofe_joint", "col_1_ols_fe_joint",
                       "col_2_ols_nofe", "col_2_ols_fe", "col_2_ols_nofe_joint", "col_2_ols_fe_joint", 
                       "col_3_ols_nofe", "col_3_ols_fe", "col_3_ols_nofe_joint", "col_3_ols_fe_joint",
                       "col_4_ols_nofe", "col_4_ols_fe", "col_4_ols_nofe_joint", "col_4_ols_fe_joint",
                       "col_5_ols_nofe", "col_5_ols_fe", "col_5_ols_nofe_joint", "col_5_ols_fe_joint",
                       "col_6_ols_nofe", "col_6_ols_fe", "col_6_ols_nofe_joint", "col_6_ols_fe_joint")

write_xlsx(data.frame(round(output,5)), "output.xlsx")



# Summary statistic
col1_Z1 <- c(summary(data$lgrain_pred_famdum5860)[c(4,3,6,1)], sd(data$lgrain_pred_famdum5860))
col2_Z1 <- c(summary(data_short$lgrain_pred_famdum5860)[c(4,3,6,1)], sd(data_short$lgrain_pred_famdum5860))
col3_Z1 <- c(summary(data_auto$lgrain_pred_famdum5860)[c(4,3,6,1)], sd(data_auto$lgrain_pred_famdum5860))
col4_Z1 <- c(summary(data_auto_short$lgrain_pred_famdum5860)[c(4,3,6,1)], sd(data_auto_short$lgrain_pred_famdum5860))
col5_Z1 <- c(summary(data$lgrain_famdum5860)[c(4,3,6,1)], sd(data$lgrain_famdum5860))
col6_Z1 <- c(summary(data_short$lgrain_famdum5860)[c(4,3,6,1)], sd(data_short$lgrain_famdum5860))

col1_Z2 <- c(summary(data$lgrain_pred)[c(4,3,6,1)], sd(data$lgrain_pred))
col2_Z2 <- c(summary(data_short$lgrain_pred)[c(4,3,6,1)], sd(data_short$lgrain_pred))
col3_Z2 <- c(summary(data_auto$lgrain_pred)[c(4,3,6,1)], sd(data_auto$lgrain_pred))
col4_Z2 <- c(summary(data_auto_short$lgrain_pred)[c(4,3,6,1)], sd(data_auto_short$lgrain_pred))
col5_Z2 <- c(summary(data$lgrain)[c(4,3,6,1)], sd(data$lgrain))
col6_Z2 <- c(summary(data_short$lgrain)[c(4,3,6,1)], sd(data_short$lgrain))

summary_output <- rbind(col1_Z1, col2_Z1, col3_Z1, col4_Z1, col5_Z1, col6_Z1,
                        col1_Z2, col2_Z2, col3_Z2, col4_Z2, col5_Z2, col6_Z2)

write_xlsx(data.frame(round(summary_output,3)), "summary.xlsx")
